#  ██████╗ ██╗  ██╗██╗   ██╗██╗      ██████╗ ██████╗ ██╗  ██╗███████╗██████╗ ███████╗
#  ██╔══██╗██║  ██║╚██╗ ██╔╝██║     ██╔═══██╗██╔══██╗██║  ██║██╔════╝██╔══██╗██╔════╝
#  ██████╔╝███████║ ╚████╔╝ ██║     ██║   ██║██████╔╝███████║█████╗  ██████╔╝█████╗
#  ██╔═══╝ ██╔══██║  ╚██╔╝  ██║     ██║   ██║██╔═══╝ ██╔══██║██╔══╝  ██╔══██╗██╔══╝
#  ██║     ██║  ██║   ██║   ███████╗╚██████╔╝██║     ██║  ██║███████╗██║  ██║███████╗
#  ╚═╝     ╚═╝  ╚═╝   ╚═╝   ╚══════╝ ╚═════╝ ╚═╝     ╚═╝  ╚═╝╚══════╝╚═╝  ╚═╝╚══════╝

## PHYLOPHERE: A Nextflow pipeline including a complete set
## of phylogenetic comparative tools and analyses for Phenome-Genome studies

### Github: https://github.com/nozerorma/caastools/nf-phylophere
### Author: Miguel Ramon (miguel.ramon@upf.edu)

#### File: phenotype_exploration.Rmd

Phenotype Exploration

This script allows for the visualization of trait distributions and phylogenetic patterns. The script includes contrast plots and phylogenetic trees to assess trait relationships.

Setup and Directory Configuration

This section configures the working environment, sets directories, and loads necessary functions and libraries.

# Call the setup function from commons.R
source("./obj/commons.R")
## [DEBUG] args = cancer_traits_processed-LQ.csv | science.abn7829_data_s4.nex.tree | data_exploration | 1998 | primates | family | neoplasia_prevalence | adult_necropsy_count | neoplasia_necropsy | /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv | malignant_prevalence | LQ
## [DEBUG] seed_val = 1998
## [DEBUG] workingDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/97/c764c03c2ef0b32c52ef00eece748a
## [DEBUG] objDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/97/c764c03c2ef0b32c52ef00eece748a/obj
## [DEBUG] resultsDir = data_exploration
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## [1] "Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/97/c764c03c2ef0b32c52ef00eece748a"
## [1] "Results Directory: data_exploration"
## [DEBUG] Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/97/c764c03c2ef0b32c52ef00eece748a
## [DEBUG] Results Directory: data_exploration
## [DEBUG] data_exploration_dir = data_exploration/1.Data-exploration
## [DEBUG] species_distribution_dir = data_exploration/1.Data-exploration/1.Species_distribution
## [DEBUG] extreme_plots_dir = data_exploration/1.Data-exploration/2.Extreme_plots
## [DEBUG] asr_trees = data_exploration/1.Data-exploration/3.ASR_trees
## [DEBUG] phylo_distribution_dir = data_exploration/1.Data-exploration/4.Phylogenetic_distribution
## [DEBUG] ci_dir = data_exploration/1.Data-exploration/5.CI_overlaps
## [DEBUG] palettes loaded: primates=15, primates_dark=15, mammals=17
## [1] "Loading trait data from: cancer_traits_processed-LQ.csv"
## [DEBUG] trait_path = cancer_traits_processed-LQ.csv
## [DEBUG] trait_df rows = 47, cols = 19
## [DEBUG] trait_df columns: tax_id, species, common_name, family, adult_necropsy_count, neoplasia_necropsy, neoplasia_prevalence, benign_count, benign_prevalence, malignant_count, malignant_prevalence, body_mass, mass_g, log_body_mass, mature_f, mature_m, mature_age, MLS.anage, LQ
## [DEBUG] trait_df species unique = 47
## [DEBUG] tree_path = science.abn7829_data_s4.nex.tree
## [DEBUG] tree tips = 236, nodes = 235
## [DEBUG] clade_name = primates
## [DEBUG] taxon_of_interest = family
## [DEBUG] trait = neoplasia_prevalence
## [DEBUG] n_trait = neoplasia_necropsy
## [DEBUG] p_trait = adult_necropsy_count
## [DEBUG] has.p = TRUE, p missing = 0
## [DEBUG] has.n = TRUE, n missing = 0
## Warning: package 'ape' was built under R version 4.4.2
## 
## Attaching package: 'ape'
## The following object is masked from 'package:dplyr':
## 
##     where
## [DEBUG] tax_id_file = /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv
## [DEBUG] tax_id_df rows = 1290, distinct taxa = 807
## [DEBUG] has.TAX_ID = TRUE
## [DEBUG] trait_df merged with tax_id_df: rows = 47, missing tax_id = 0
## [DEBUG] normalized tax_id from merged columns, missing tax_id = 0
## [DEBUG] tree_ids rows = 40, missing tax_id = 0
## [DEBUG] common_tax_ids = 40
## [DEBUG] pruned_tree tips (TAX_ID) = 40, nodes = 39
## [DEBUG] trait_df after TAX_ID tree filter rows = 40
## [DEBUG] secondary_trait = malignant_prevalence
## [DEBUG] has.secondary = TRUE, missing = 0
## [DEBUG] branch_trait = LQ
## [DEBUG] has.branch = TRUE, missing = 0
setup_rmd()

# Debug helper (prints into HTML output)
if (is.null(getOption("phylo_debug"))) {
  options(phylo_debug = TRUE)
}
if (!exists("phylo_debug_log", envir = .GlobalEnv)) {
  phylo_debug_log <- character()
}
if (!exists("debug_log", inherits = TRUE)) {
  debug_log <- function(...) {
    msg <- sprintf(...)
    phylo_debug_log <<- c(phylo_debug_log, msg)
    cat("[DEBUG] ", msg, "\n", sep = "")
  }
}

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(readr)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggrepel)
library(ggtree)
## Warning: package 'ggtree' was built under R version 4.4.2
## ggtree v3.14.0 Learn more at https://yulab-smu.top/contribution-tree-data/
## 
## Please cite:
## 
## Guangchuang Yu. Using ggtree to visualize data on tree-like structures.
## Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
## 
## Attaching package: 'ggtree'
## The following object is masked from 'package:ape':
## 
##     rotate
## The following object is masked from 'package:tidyr':
## 
##     expand
library(ggnewscale)
library(ggstar)
library(colorspace)
library(treeio)
## Warning: package 'treeio' was built under R version 4.4.2
## treeio v1.30.0 Learn more at https://yulab-smu.top/contribution-tree-data/
## 
## Please cite:
## 
## LG Wang, TTY Lam, S Xu, Z Dai, L Zhou, T Feng, P Guo, CW Dunn, BR
## Jones, T Bradley, H Zhu, Y Guan, Y Jiang, G Yu. treeio: an R package
## for phylogenetic tree input and output with richly annotated and
## associated data. Molecular Biology and Evolution. 2020, 37(2):599-603.
## doi: 10.1093/molbev/msz240
library(tidytree)
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
## 
## Guangchuang Yu.  Data Integration, Manipulation and Visualization of
## Phylogenetic Trees (1st edition). Chapman and Hall/CRC. 2022,
## doi:10.1201/9781003279242
## 
## Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam.
## ggtree: an R package for visualization and annotation of phylogenetic
## trees with their covariates and other associated data. Methods in
## Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## 
## Attaching package: 'tidytree'
## The following object is masked from 'package:treeio':
## 
##     getNodeNum
## The following objects are masked from 'package:ape':
## 
##     drop.tip, keep.tip
## The following object is masked from 'package:stats':
## 
##     filter
library(phylolm)
library(ggtreeExtra)
## Warning: package 'ggtreeExtra' was built under R version 4.4.2
## ggtreeExtra v1.16.0 For help: https://yulab-smu.top/treedata-book/
## 
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
## 
## S Xu, Z Dai, P Guo, X Fu, S Liu, L Zhou, W Tang, T Feng, M Chen, L
## Zhan, T Wu, E Hu, Y Jiang, X Bo, G Yu. ggtreeExtra: Compact
## visualization of richly annotated phylogenetic data. Molecular Biology
## and Evolution. 2021, 38(9):4039-4042. doi: 10.1093/molbev/msab166
library(geiger)
## Loading required package: phytools
## Loading required package: maps
## 
## Attaching package: 'phytools'
## The following object is masked from 'package:treeio':
## 
##     read.newick
## 
## Attaching package: 'geiger'
## The following object is masked from 'package:tidytree':
## 
##     treedata
## The following object is masked from 'package:treeio':
## 
##     treedata
library(rphylopic)
## You are using rphylopic v.1.5.0. Please remember to credit PhyloPic contributors (hint: `get_attribution()`) and cite rphylopic in your work (hint: `citation("rphylopic")`).
# Objects
color_palette <- paste0(clade_name, "_palette") # Color palette for the clade
capitalized_taxon <- tools::toTitleCase(taxon_of_interest) # Capitalized taxon name
capitalized_trait <- tools::toTitleCase(gsub("_", " ", trait)) # Capitalized and deconvoluted trait name
capitalized_clade <- tools::toTitleCase(gsub("_", " ", clade_name)) # Capitalized and deconvoluted clade name

createDir(extreme_plots_dir)
## [DEBUG] createDir: created data_exploration/1.Data-exploration/2.Extreme_plots
createDir(asr_trees)
## [DEBUG] createDir: created data_exploration/1.Data-exploration/3.ASR_trees
createDir(phylo_distribution_dir)
## [DEBUG] createDir: created data_exploration/1.Data-exploration/4.Phylogenetic_distribution

1. Data Import

This section imports trait summary data from the dataset exploration stage.

# Load summary stats from Dataset Exploration
stats_path <- file.path(species_distribution_dir, "trait_stats.csv")
if (!file.exists(stats_path)) {
  stop("Missing trait stats file: ", stats_path)
}
stats_df <- read.csv(stats_path, stringsAsFactors = FALSE)

1. Load Melted Trait Data

This section loads pre-processed trait data in a long (melted) format and adds common names for species.

# 1. LOAD STATS ------------------------------------------------------
# Excerpt of the trait stats data
head(stats_df)
##                species   family neoplasia_prevalence neoplasia_necropsy
## 1      Alouatta_caraya Atelidae           0.12500000                  4
## 2     Ateles_geoffroyi Atelidae           0.30232558                 13
## 3    Callimico_goeldii  Cebidae           0.21739130                 15
## 4 Callithrix_geoffroyi  Cebidae           0.09677419                  3
## 5   Callithrix_jacchus  Cebidae           0.02439024                  1
## 6     Cebuella_pygmaea  Cebidae           0.09756098                  4
##   adult_necropsy_count malignant_prevalence        LQ  taxa_mean taxa_median
## 1                   32           0.03125000 0.9846315 0.21366279  0.21366279
## 2                   43           0.18604651 1.3561723 0.21366279  0.21366279
## 3                   69           0.10144928 1.0393871 0.08696912  0.09677419
## 4                   31           0.03225806 0.9029786 0.08696912  0.09677419
## 5                   41           0.02439024 1.2363178 0.08696912  0.09677419
## 6                   41           0.04878049 1.1559844 0.08696912  0.09677419
##      taxa_sd  taxa_q25  taxa_q75      value    g_mean   g_median       g_sd
## 1 0.12538812 0.1693314 0.2579942 0.12500000 0.1152653 0.09716758 0.08067951
## 2 0.12538812 0.1693314 0.2579942 0.30232558 0.1152653 0.09716758 0.08067951
## 3 0.06460348 0.0400000 0.1176471 0.21739130 0.1152653 0.09716758 0.08067951
## 4 0.06460348 0.0400000 0.1176471 0.09677419 0.1152653 0.09716758 0.08067951
## 5 0.06460348 0.0400000 0.1176471 0.02439024 0.1152653 0.09716758 0.08067951
## 6 0.06460348 0.0400000 0.1176471 0.09756098 0.1152653 0.09716758 0.08067951
##        g_q25     g_q75 outlier extreme_outlier global_label taxa_outlier
## 1 0.04963592 0.1621053  normal          normal       normal       normal
## 2 0.04963592 0.1621053  normal          normal high_extreme       normal
## 3 0.04963592 0.1621053  normal          normal high_extreme       normal
## 4 0.04963592 0.1621053  normal          normal       normal       normal
## 5 0.04963592 0.1621053  normal          normal  low_extreme       normal
## 6 0.04963592 0.1621053  normal          normal       normal       normal
##   extreme_taxa_outlier   taxa_label
## 1               normal  low_extreme
## 2               normal high_extreme
## 3               normal high_extreme
## 4               normal       normal
## 5               normal  low_extreme
## 6               normal       normal
stats_df <- stats_df %>%
  dplyr::mutate(
    value = .data[[trait]],
    taxa = .data[[taxon_of_interest]],
    common_name = gsub("_", " ", species)
  )

if(isTRUE(has.p)){
  stats_df <- stats_df %>%   
    mutate(p = .data[[p_trait]])
  debug_log("contrast_plot.f using p_trait = %s, missing p = %d", p_trait, sum(is.na(stats_df$p)))
}
## [DEBUG] contrast_plot.f using p_trait = adult_necropsy_count, missing p = 0
# Ensure species order matches phylogeny
ordered_species <-  tidytree::as_tibble(pruned_tree) %>%
  dplyr::arrange(desc(.data$node)) %>%
  # Remove internal nodes by filtering only rows where label is not NA
  dplyr::filter(!is.na(.data$label)) %>%
  dplyr::pull(.data$label)

print(ordered_species)
##  [1] "Galago_moholi"              "Galago_senegalensis"       
##  [3] "Nycticebus_coucang"         "Nycticebus_pygmaeus"       
##  [5] "Microcebus_murinus"         "Propithecus_coquereli"     
##  [7] "Varecia_variegata"          "Varecia_rubra"             
##  [9] "Lemur_catta"                "Eulemur_macaco"            
## [11] "Hylobates_lar"              "Gorilla_gorilla"           
## [13] "Pan_troglodytes"            "Colobus_guereza"           
## [15] "Trachypithecus_francoisi"   "Trachypithecus_cristatus"  
## [17] "Trachypithecus_auratus"     "Cercopithecus_neglectus"   
## [19] "Mandrillus_sphinx"          "Papio_hamadryas"           
## [21] "Theropithecus_gelada"       "Macaca_nigra"              
## [23] "Macaca_silenus"             "Macaca_fuscata"            
## [25] "Macaca_fascicularis"        "Ateles_geoffroyi"          
## [27] "Alouatta_caraya"            "Saimiri_sciureus"          
## [29] "Sapajus_apella"             "Saguinus_imperator"        
## [31] "Saguinus_midas"             "Saguinus_bicolor"          
## [33] "Saguinus_oedipus"           "Leontopithecus_chrysomelas"
## [35] "Leontopithecus_rosalia"     "Callithrix_jacchus"        
## [37] "Callithrix_geoffroyi"       "Mico_argentatus"           
## [39] "Cebuella_pygmaea"           "Callimico_goeldii"
stats_df$species <- factor(stats_df$species, levels = ordered_species)
debug_log("contrast_plot.f ordered_species = %d", length(ordered_species))
## [DEBUG] contrast_plot.f ordered_species = 40
# Define ordered taxa for plotting based on phylogeny order
ordered_taxa <- ordered_species %>%
  sapply(function(sp) {
    idx <- which(stats_df$species == sp)
    if (length(idx) == 0) return(NA_character_)
    stats_df$taxa[idx[1]]
  }) %>%
  unname() %>%
  na.omit() %>%
  unique()

print(ordered_taxa)
##  [1] "Galagidae"       "Lorisidae"       "Cheirogaleidae"  "Indriidae"      
##  [5] "Lemuridae"       "Hylobatidae"     "Hominidae"       "Cercopithecidae"
##  [9] "Atelidae"        "Cebidae"
# Arrange stats_df by taxa
stats_df <- stats_df %>%
  dplyr::mutate(taxa = factor(taxa, levels = ordered_taxa)) %>%
  dplyr::arrange(taxa, .by_group = TRUE)

3. Contrast Plots

This section generates contrast plots to visualize the trait across taxa, highlighting extreme values and outliers.

## PLOT FUNCTION --------------------------------------------------------------
### 1. Contrast plot function
contrast_plot.f <- function(df) {
  stopifnot(all(c("species", taxon_of_interest, trait) %in% names(df)))
  palette_values <- get_palette_values()
  debug_log("contrast_plot.f rows = %d, trait = %s, taxon = %s", nrow(df), trait, taxon_of_interest)
  debug_log("contrast_plot.f columns: %s", paste(names(df), collapse = ", "))
  fill_scale <- if (is.null(palette_values)) {
    ggplot2::scale_fill_discrete()
  } else {
    ggplot2::scale_fill_manual(values = palette_values)
  }
  color_scale <- if (is.null(palette_values)) {
    ggplot2::scale_color_discrete()
  } else {
    ggplot2::scale_color_manual(values = palette_values)
  }

  global_median <- df$g_median
  
  ggplot(df, aes(x = value, y = taxa)) +
    ggplot2::scale_y_discrete(limits = rev(ordered_taxa)) +
    ggplot2::geom_point(
      data = subset(df, global_label == "normal"),
      aes(shape = global_label, color = taxa), size = 5
    ) +
    ggplot2::geom_point(
      data = subset(df, global_label %in% c("low_extreme", "high_extreme")),
      aes(shape = global_label, color = taxa, fill = taxa), size = 5
    ) +
    ggplot2::scale_shape_manual(values = c("low_extreme" = 15, "normal" = 1, "high_extreme" = 17)) +
    ggplot2::scale_fill_manual(values = palette_values, breaks = 0) +
    ggplot2::scale_color_manual(values = palette_values, breaks = 0) +
    ggplot2::geom_vline(xintercept = global_median, linetype = "longdash", linewidth = 1.2, color = "salmon3") +
    ggplot2::stat_summary(fun = median, geom = "errorbar",
                          aes(xmax = after_stat(x), xmin = after_stat(x), y = taxa),
                          linewidth = 1.2, color = "skyblue4", alpha = 0.8) +
    ggplot2::labs(x = capitalized_trait, 
                  y = paste(capitalized_taxon, "-", capitalized_clade), 
                  title = paste("Trait differential plot for trait:", capitalized_trait, "in", clade_name),
                  caption = "Global median shown as a longdash red line. Per taxa median shown as skyblue error bars. 
                  Extreme values were identified based on global thresholds (top and bottom quantiles).") +
    ggrepel::geom_text_repel(
      data = subset(df, global_label %in% c("low_extreme", "high_extreme")),
      aes(label = species), size = 5, hjust = 0, vjust = 0, family = "Inter",
      segment.size = 0.3, nudge_x = 0.010, nudge_y = 0.5, max.overlaps = Inf,
      force = 1, direction = "y", max.iter = 10000, label.padding = 20,
      min.segment.length = 0.06, seed = seed_val, show.legend = FALSE
    ) +
    ggplot2::theme_minimal() +
    ggplot2::theme(
      plot.title = ggplot2::element_text(size = 20, hjust = 0.5, margin = ggplot2::margin(t = 20), face = "bold"),
      axis.text.y = ggplot2::element_text(size = 15),
      axis.title.x = ggplot2::element_text(size = 15, hjust = 0.5, margin = ggplot2::margin(t = 20, b = 20)),
      axis.title.y = ggplot2::element_text(size = 15, angle = 90, hjust = 0.5, margin = ggplot2::margin(l = 20, r = 20)),
      caption = ggplot2::element_text(size = 12, hjust = 0.5, margin = ggplot2::margin(t = 20)),
      legend.text = ggplot2::element_text(size = 15),
      legend.position = "bottom"
    )
}

# 3. CONTRAST PLOTS  ------------------------------------------------------
plot_ready <- !is.null(stats_df) && nrow(stats_df) > 0
required_cols <- c("species", taxon_of_interest, trait)
plot_ready <- plot_ready && all(required_cols %in% names(stats_df))

if (!plot_ready) {
  message("Skipping contrast plot: missing or empty trait stats.")
} else {
  contrast_graph <- contrast_plot.f(stats_df)
  ggplot2::ggsave(
    file.path(extreme_plots_dir, paste0(trait, "_contrast_plot.png")),
    contrast_graph, device = "png", width = 15, height = 10, dpi = "retina"
  )
  contrast_graph
}
## [DEBUG] contrast_plot.f rows = 40, trait = neoplasia_prevalence, taxon = family
## [DEBUG] contrast_plot.f columns: species, family, neoplasia_prevalence, neoplasia_necropsy, adult_necropsy_count, malignant_prevalence, LQ, taxa_mean, taxa_median, taxa_sd, taxa_q25, taxa_q75, value, g_mean, g_median, g_sd, g_q25, g_q75, outlier, extreme_outlier, global_label, taxa_outlier, extreme_taxa_outlier, taxa_label, taxa, common_name, p
## Warning in ggrepel::geom_text_repel(data = subset(df, global_label %in% :
## Ignoring unknown parameters: `label.padding`
## Warning in plot_theme(plot): The `caption` theme element is not defined in the element hierarchy.
## The `caption` theme element is not defined in the element hierarchy.
## The `caption` theme element is not defined in the element hierarchy.

4. Violin Plots

This section generates violin plots to show trait distributions across taxa.

### PLOT FUNCTION --------------------------------------------------------------
violin_extremes.f <- function(df, trait, taxon_of_interest) {
  stopifnot(all(c("species", taxon_of_interest, trait) %in% names(df)))
  palette_values <- get_palette_values()
  debug_log("violin_extremes.f rows = %d, trait = %s, taxon = %s", nrow(df), trait, taxon_of_interest)
  fill_scale <- if (is.null(palette_values)) {
    ggplot2::scale_fill_discrete()
  } else {
    ggplot2::scale_fill_manual(values = palette_values)
  }
  dark_palette_values <- get_palette_values(paste0("dark_", color_palette))
  fill_scale_dark <- if (is.null(dark_palette_values)) {
    fill_scale
  } else {
    ggplot2::scale_fill_manual(values = dark_palette_values)
  }
  color_scale_dark <- if (is.null(dark_palette_values)) {
    ggplot2::scale_color_discrete()
  } else {
    ggplot2::scale_color_manual(values = dark_palette_values)
  }

  # Ensure species order matches phylogeny
  ordered_species <- pruned_tree$tip.label
  df$species <- factor(df$species, levels = ordered_species)
  debug_log("violin_extremes.f ordered_species = %d", length(ordered_species))
  
  # Define ordered taxa for plotting using global phylogeny order if available
  if (!exists("ordered_taxa", inherits = TRUE) || length(ordered_taxa) == 0) {
    ordered_taxa <- unique(pruned_tree$tip.label %>%
      sapply(function(sp) df$taxa[df$species == sp]) %>%
      na.omit())
    if (length(ordered_taxa) == 0) {
      ordered_taxa <- unique(df$taxa[!is.na(df$taxa)])
    }
  }
  
  # Calculate median value for reference line
  median_value <- median(df$value, na.rm = TRUE)
  
  # Create violin plot
  plot <- ggplot2::ggplot(data = df, aes(x = value, y = taxa)) +
    ggplot2::scale_y_discrete(limits = rev(ordered_taxa)) +
    ggplot2::geom_violin(
      aes(fill = taxa), color = "black", width = 0.5, trim = TRUE, scale = "width", alpha = 0.15
    ) +
    fill_scale_dark +
    ggplot2::geom_point(
      data = subset(df, outlier == "normal"),
      aes(shape = global_label, color = taxa),
      size = 3, position = ggplot2::position_jitter(height = 0.25)
    ) +
    ggplot2::geom_point(
      data = subset(df, outlier != "normal"),
      aes(color = taxa, fill = taxa),
      shape = 21, size = 3, stroke = 0.8, position = ggplot2::position_jitter(height = 0.25)
    ) +
    ggplot2::scale_shape_manual(values = c("low_extreme" = 15, "normal" = 1, "high_extreme" = 17)) +
    color_scale_dark +
    ggplot2::geom_vline(
      aes(xintercept = median_value), linewidth = 1, color = "salmon3", alpha = 0.5
    ) +
    ggplot2::stat_summary(
      fun = median, geom = "errorbar",
      aes(xmax = after_stat(x), xmin = after_stat(x)),
      linewidth = 1.5, color = "black", alpha = 0.6, width = 0.75
    ) +
    ggplot2::labs(x = capitalized_trait) +
    ggplot2::theme_minimal() +
    ggplot2::theme(
      axis.text.y = ggplot2::element_text(size = 17, hjust = 0.5, face = "bold"),
      axis.title.x = ggplot2::element_text(size = 17, hjust = 0.5, face = "bold", margin = ggplot2::margin(t = 20, b = 20)),
      axis.title.y = ggplot2::element_blank(),
      legend.text = ggplot2::element_text(size = 15),
      legend.position = "none"
    )
  
  return(plot)
}

if (!plot_ready) {
  message("Skipping violin plot: missing or empty trait stats.")
} else {
  violin_plot_path <- file.path(extreme_plots_dir, paste0(trait, "_violin_plot.png"))
  violin_graph <- violin_extremes.f(stats_df, trait, taxon_of_interest)
  ggplot2::ggsave(
    violin_plot_path,
    violin_graph, device = "png", width = 7, height = 10, dpi = "retina"
  )
  violin_graph
}
## [DEBUG] violin_extremes.f rows = 40, trait = neoplasia_prevalence, taxon = family
## [DEBUG] violin_extremes.f ordered_species = 40
## Warning: Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.
## Groups with fewer than two datapoints have been dropped.
## ℹ Set `drop = FALSE` to consider such groups for position adjustment purposes.

5. Ancestral State Reconstruction (ASR) Plots

This section generates ASR plots to visualize the evolutionary history of the trait across the phylogenetic

# 4. ASR PLOTS  ------------------------------------------------------
asr_tree.f <- function(df, tree, trait, taxon_of_interest) {
  
  # --- Trait vector ---
  trait_vec <- df[[trait]]
  names(trait_vec) <- df$species
  trait_vec <- trait_vec[!is.na(trait_vec) & is.finite(trait_vec)]
  if(length(trait_vec) < 3) stop("Need >=3 non-missing species.")
  debug_log("asr_tree.f input rows = %d, trait_vec = %d", nrow(df), length(trait_vec))
  

  tree <- ape::drop.tip(tree, setdiff(tree$tip.label, names(trait_vec)))
  tree <- ape::reorder.phylo(tree, "cladewise")
  Ntip <- length(tree$tip.label)
  Nnode <- tree$Nnode
  debug_log("asr_tree.f tree tips after prune = %d, nodes = %d", Ntip, Nnode)
  node_indices <- (Ntip+1):(Ntip+Nnode)

  # Sanitize trait vector to match pruned tree
  trait_vec <- trait_vec[tree$tip.label]

  # Also sanitize the data frame
  df <- df[df$species %in% tree$tip.label, ]
  df$species <- factor(df$species, levels = tree$tip.label)

  debug_log("asr_tree.f trait_vec after prune = %d", length(trait_vec))
  debug_log("asr_tree.f df after prune = %d", nrow(df))
  
  # --- Fit BM / lambda ---
  fit_model <- function(tree, trait, model=c("BM","lambda")) {
    model <- match.arg(model)
    df <- data.frame(trait=trait, species=names(trait))
    fit <- phylolm(trait~1, data=df, phy=tree, model=ifelse(model=="BM","BM","lambda"))
    aic <- fit$aic; k <- fit$p
    aicc <- aic + (2*k*(k+1))/(length(trait)-k-1)
    list(lnL=fit$logLik, sigma2=fit$sigma2, lambda=fit$optpar, aicc=aicc, raw=fit)
  }
  
  fit_bm <- fit_model(tree, trait_vec, "BM")
  fit_lambda <- fit_model(tree, trait_vec, "lambda")
  debug_log("asr_tree.f fit_bm aicc = %.3f, fit_lambda aicc = %.3f", fit_bm$aicc, fit_lambda$aicc)
  
  delta_aicc <- fit_bm$aicc - fit_lambda$aicc
  chosen_model <- ifelse(fit_lambda$aicc < fit_bm$aicc, "lambda", "BM")
  if(abs(delta_aicc)<2 && !is.null(fit_lambda$lambda) && abs(fit_lambda$lambda-1)<0.05) chosen_model <- "BM"
  debug_log("asr_tree.f chosen_model = %s", chosen_model)
  
  # --- Rescale tree & ASR ---
  if(chosen_model=="lambda"){
    lambda_hat <- fit_lambda$lambda
    if (is.null(lambda_hat) || is.na(lambda_hat)) {
      lambda_hat <- 1
    }
    debug_log("asr_tree.f lambda_hat = %.4f", lambda_hat)
    sigma2_used <- fit_lambda$sigma2
    rescaled_tree <- phytools::rescale(tree,"lambda",lambda=lambda_hat)
  } else {
    sigma2_used <- fit_bm$sigma2
    rescaled_tree <- phytools::rescale(tree,"BM")
  }
  debug_log("asr_tree.f sigma2_used = %.6f", sigma2_used)
  
  # Recalculate node_indices for rescaled tree (structure may be same, but let's be safe)
  Ntip_rescaled <- length(rescaled_tree$tip.label)
  Nnode_rescaled <- rescaled_tree$Nnode
  node_indices <- (Ntip_rescaled+1):(Ntip_rescaled+Nnode_rescaled)

  # Get point estimates via ACE with fallback to the original tree
  ace_try <- function(tree_obj) {
    tryCatch(
      ape::ace(x = trait_vec, phy = tree_obj, type = "continuous", method = "REML"),
      error = function(e) tryCatch(
        ape::ace(x = trait_vec, phy = tree_obj, type = "continuous", method = "ML"),
        error = function(e2) NULL
      )
    )
  }
  ace_res <- ace_try(rescaled_tree)
  model_used <- chosen_model
  if (is.null(ace_res)) {
    message("ACE failed on rescaled tree; retrying on original tree.")
    ace_res <- ace_try(tree)
    if (is.null(ace_res)) {
      message("ACE failed on original tree; falling back to fastAnc without CIs.")
      anc_est <- tryCatch(
        phytools::fastAnc(rescaled_tree, trait_vec),
        error = function(e) NULL
      )
      if (is.null(anc_est)) {
        debug_log("asr_tree.f fastAnc failed")
      } else {
        debug_log("asr_tree.f fastAnc ok, n = %d", length(anc_est))
      }
      if (is.null(anc_est)) {
        stop("ACE and fastAnc failed.")
      }
      ace_res <- list(
        ace = anc_est,
        CI95 = cbind(anc_est, anc_est)
      )
    } else {
      rescaled_tree <- tree
      Ntip_rescaled <- length(rescaled_tree$tip.label)
      Nnode_rescaled <- rescaled_tree$Nnode
      node_indices <- (Ntip_rescaled+1):(Ntip_rescaled+Nnode_rescaled)
      model_used <- "BM"
    }
  }
  
  node_est <- ace_res$ace
  tip_est <- trait_vec[rescaled_tree$tip.label]
  debug_log("asr_tree.f node_est = %d, tip_est = %d", length(node_est), length(tip_est))
  
  # --- Build node and tip tables ---
  # Use analytical CIs from ace
  node_ci <- ace_res$CI95
  node_df <- data.frame(
    node = node_indices,
    estimate = as.numeric(node_est[as.character(node_indices)]),
    ci_lower = as.numeric(node_ci[,1]),
    ci_upper = as.numeric(node_ci[,2]),
    stringsAsFactors = FALSE
  )
  # Tips don't have analytical CIs in ace output, use point estimates
  tip_df <- data.frame(
    tip = rescaled_tree$tip.label,
    estimate = as.numeric(tip_est[rescaled_tree$tip.label]),
    ci_lower = as.numeric(tip_est[rescaled_tree$tip.label]),
    ci_upper = as.numeric(tip_est[rescaled_tree$tip.label]),
    immediate_parent = NA,
    cumulative_derived = NA,
    direction = NA,
    parent_node_id = NA,
    parent_ci_lower = NA,
    parent_ci_upper = NA,
    stringsAsFactors = FALSE
  )
  
  # --- Derivedness tests: immediate-parent and cumulative-ancestors ---
  # Build parent map (use rescaled tree dimensions)
  parent_map <- integer(Ntip_rescaled + Nnode_rescaled)
  parent_map[] <- NA_integer_
  for (j in seq_len(nrow(rescaled_tree$edge))) {
    parent_map[rescaled_tree$edge[j,2]] <- rescaled_tree$edge[j,1]
  }
  
  # Test tips for derivedness
  for (i in seq_len(nrow(tip_df))) {
    tip_label <- tip_df$tip[i]
    tip_est_val <- tip_df$estimate[i]
    tip_idx <- which(rescaled_tree$tip.label == tip_label)
    parent_id <- parent_map[tip_idx]
    
    if (!is.na(parent_id)) {
      # Store parent node ID
      tip_df$parent_node_id[i] <- parent_id
      
      # Get parent node CI
      if (parent_id <= Ntip_rescaled) {
        parent_row <- tip_df[tip_df$tip == rescaled_tree$tip.label[parent_id], ]
      } else {
        parent_row <- node_df[node_df$node == parent_id, ]
      }
      parent_ci_low <- parent_row$ci_lower
      parent_ci_high <- parent_row$ci_upper
      
      # Immediate-parent derivedness test
      tip_df$immediate_parent[i] <- (tip_est_val > parent_ci_high) || (tip_est_val < parent_ci_low)
      
      if (tip_est_val > parent_ci_high) {
        tip_df$direction[i] <- "up"
      } else if (tip_est_val < parent_ci_low) {
        tip_df$direction[i] <- "down"
      } else {
        tip_df$direction[i] <- "none"
      }
      
      if (tip_df$immediate_parent[i]) {
        tip_df$parent_ci_lower[i] <- parent_ci_low
        tip_df$parent_ci_upper[i] <- parent_ci_high
      }
    }
    
    # Cumulative derivedness: derived from ALL ancestors
    anc_list <- integer(0)
    cur <- tip_idx
    while (!is.na(parent_map[cur])) {
      cur <- parent_map[cur]
      anc_list <- c(anc_list, cur)
    }
    anc_nodes <- anc_list[anc_list > Ntip_rescaled]
    
    if (length(anc_nodes) == 0) {
      tip_df$cumulative_derived[i] <- FALSE
    } else {
      anc_rows <- node_df[node_df$node %in% anc_nodes, ]
      if (nrow(anc_rows) == 0) {
        tip_df$cumulative_derived[i] <- NA
      } else {
        tip_df$cumulative_derived[i] <- all((tip_est_val > anc_rows$ci_upper) | (tip_est_val < anc_rows$ci_lower))
      }
    }
  }
  
  # --- Plot ---
  cont_obj <- phytools::contMap(rescaled_tree, trait_vec, plot=FALSE)
  debug_log("asr_tree.f contMap range = [%.4f, %.4f]", cont_obj$lims[1], cont_obj$lims[2])
  cont_obj <- phytools::setMap(cont_obj, colors=colorRampPalette(c("white","salmon3"))(100))
  h <- max(phytools::nodeHeights(cont_obj$tree))

  par(mar=c(5,5,10,7), oma=c(0,0,2,0))

  plot(cont_obj, fsize=1.8, lwd=6, res=320, legend=FALSE,
      xlim=c(-0.2*h, 2*h),
      cex.main=2.5)

  lwd_bar <- 10
  root_node_idx <- node_indices[1]
  root_est_str <- sprintf("%.2f (%.2f-%.2f)", 
                          node_df$estimate[1], 
                          node_df$ci_lower[1], 
                          node_df$ci_upper[1])
  
  phytools::add.color.bar(
    Ntip_rescaled - 1, 
    cont_obj$cols, 
    title = paste0(trait, " (", model_used, ")"),
    subtitle = "",
    lims=NULL,
    lwd=lwd_bar, 
    direction="upwards",
    x=-0.2*h, 
    y=1,
    prompt=FALSE, 
    fsize=1.5
  )
  
  # Add ticks and labels to color bar
  tick_x <- -0.2*h + lwd_bar / 20
  lines(x = rep(tick_x, 2), y = c(1, Ntip_rescaled))
  nticks <- 10
  
  Y <- seq(1, Ntip_rescaled, length.out = nticks)
  X <- cbind(rep(tick_x, nticks), rep(tick_x + 0.02*h, nticks))
  ticks <- seq(cont_obj$lims[1], cont_obj$lims[2], length.out = nticks)
  text(x = X[, 2], y = Y, labels = round(ticks, 3), pos = 4, cex = 1.5)
  
  # Add root annotation
  text(x = 0.02*h, y = Ntip_rescaled/1.45, labels = root_est_str, pos = 4, cex = 1.5, col = "black")
  
  # Collect parent nodes that have derived tips
  parent_nodes_with_derived_tips <- unique(tip_df$parent_node_id[tip_df$immediate_parent & !is.na(tip_df$parent_node_id)])
  
  # Add labels ONLY for parent nodes of derived tips
  for (parent_node in parent_nodes_with_derived_tips) {
    if (parent_node > Ntip_rescaled) {
      # It's an internal node
      parent_row <- node_df[node_df$node == parent_node, ]
      if (nrow(parent_row) > 0) {
        parent_est <- parent_row$estimate
        parent_ci_low <- parent_row$ci_lower
        parent_ci_high <- parent_row$ci_upper

        # Create label: estimate (ci_lower-ci_upper)
        parent_label <- sprintf("%.2f (%.2f-%.2f)", parent_est, parent_ci_low, parent_ci_high)

        # Display at the parent node
        ape::nodelabels(text = parent_label, node = parent_node,
                        frame = "none", cex = 1.5, col = "black", adj = c(-0.25, 0.4))
      }
    }
  }
    
  # Add tip symbols and labels
  for(i in seq_len(Ntip_rescaled)){
    tip_pos <- i
    
    if(tip_df$immediate_parent[i]){
      # Tip symbol for derived tips
      pch_val <- ifelse(tip_df$direction[i]=="up", 24, 25)
      bg_val <- ifelse(tip_df$direction[i]=="up", "salmon3", "skyblue3")
      ape::tiplabels(pch=pch_val, bg=bg_val, tip=tip_pos, cex=2, offset=58)
      
      # Tip value (larger to match parent node emphasis)
      ape::tiplabels(text=sprintf("%.2f", tip_df$estimate[i]), 
                    tip=tip_pos, frame="none", cex=1.5, offset=40)
      
      # Connection line from tip to tip value
      segments(x0 = h, y0 = tip_pos-0.4, x1 = h + 70, y1 = tip_pos-0.4, 
              col = "gray50", lty = 2)
    }
    
    # Cumulative derived marker
    if(tip_df$cumulative_derived[i]){
      ape::tiplabels(pch=22, bg="forestgreen", tip=tip_pos, cex=2, offset=62)
    }
  }
  
  mtext(paste0("Symbols: ▲ Tip Up, ▼ Tip Down, ■ Cumulatively derived; ",
              "Parent node values shown for derived tips."),
        side=3, line=-2, cex=1.2, adj=0.35)
  
  
  # --- Return ---
  list(
    chosen_model=chosen_model,
    node_table=node_df,
    tip_table=tip_df,
    sigma2_used=sigma2_used,
    ace=ace_res,
    rescaled_tree=rescaled_tree
  )
}

if (!plot_ready) {
  message("Skipping ASR plot: missing or empty trait stats.")
} else {
  asr_plot_path <- file.path(asr_trees, paste0(trait, "_asr_tree.png"))
  tryCatch(
    {
      grDevices::png(
        filename = asr_plot_path,
        width = 17, height = 20, res = 320, bg = "white", units = "in"
      )
      asr_result <- asr_tree.f(stats_df, pruned_tree, trait, taxon_of_interest)
      grDevices::dev.off()
    },
    error = function(e) {
      message("ASR plot file failed: ", e$message)
      try(grDevices::dev.off(), silent = TRUE)
    }
  )
  tryCatch(
    asr_tree.f(stats_df, pruned_tree, trait, taxon_of_interest),
    error = function(e) message("ASR report plot failed: ", e$message)
  )
}
## [DEBUG] asr_tree.f input rows = 40, trait_vec = 40
## [DEBUG] asr_tree.f tree tips after prune = 40, nodes = 39
## [DEBUG] asr_tree.f trait_vec after prune = 40
## [DEBUG] asr_tree.f df after prune = 40
## [DEBUG] asr_tree.f fit_bm aicc = -72.498, fit_lambda aicc = -88.723
## [DEBUG] asr_tree.f chosen_model = lambda
## [DEBUG] asr_tree.f lambda_hat = 0.5700
## [DEBUG] asr_tree.f sigma2_used = 0.000126
## [DEBUG] asr_tree.f node_est = 39, tip_est = 40
## [DEBUG] asr_tree.f contMap range = [0.0000, 0.3023]
## [DEBUG] asr_tree.f input rows = 40, trait_vec = 40
## [DEBUG] asr_tree.f tree tips after prune = 40, nodes = 39
## [DEBUG] asr_tree.f trait_vec after prune = 40
## [DEBUG] asr_tree.f df after prune = 40
## [DEBUG] asr_tree.f fit_bm aicc = -72.498, fit_lambda aicc = -88.723
## [DEBUG] asr_tree.f chosen_model = lambda
## [DEBUG] asr_tree.f lambda_hat = 0.5700
## [DEBUG] asr_tree.f sigma2_used = 0.000126
## [DEBUG] asr_tree.f node_est = 39, tip_est = 40
## [DEBUG] asr_tree.f contMap range = [0.0000, 0.3023]

## $chosen_model
## [1] "lambda"
## 
## $node_table
##    node   estimate   ci_lower  ci_upper
## 1    41 0.14623473 0.07800692 0.2144625
## 2    42 0.12690762 0.07424590 0.1795693
## 3    43 0.12478526 0.07812529 0.1714452
## 4    44 0.11833327 0.07340375 0.1632628
## 5    45 0.10426635 0.06367056 0.1448621
## 6    46 0.10487640 0.06419512 0.1455577
## 7    47 0.10495571 0.06306556 0.1468459
## 8    48 0.09250352 0.04728177 0.1377253
## 9    49 0.09208642 0.04608405 0.1380888
## 10   50 0.08845051 0.03919697 0.1377041
## 11   51 0.10976603 0.05521258 0.1643195
## 12   52 0.08415091 0.03787129 0.1304305
## 13   53 0.08320406 0.03627344 0.1301347
## 14   54 0.07891410 0.03006871 0.1277595
## 15   55 0.11683206 0.06785792 0.1658062
## 16   56 0.13895532 0.08402754 0.1938831
## 17   57 0.12088425 0.07063623 0.1711323
## 18   58 0.09215630 0.04684096 0.1374716
## 19   59 0.08359632 0.03905823 0.1281344
## 20   60 0.07707426 0.03409627 0.1200523
## 21   61 0.07366448 0.02810202 0.1192269
## 22   62 0.07365543 0.02569223 0.1216186
## 23   63 0.07268110 0.02491605 0.1204462
## 24   64 0.07485870 0.02973485 0.1199826
## 25   65 0.07063119 0.02135892 0.1199035
## 26   66 0.08935705 0.03931862 0.1393955
## 27   67 0.08142196 0.02805279 0.1347911
## 28   68 0.07769006 0.02330546 0.1320746
## 29   69 0.13465667 0.07745506 0.1918583
## 30   70 0.15159589 0.08741394 0.2157778
## 31   71 0.15606777 0.09328381 0.2188517
## 32   72 0.17955431 0.12025054 0.2388581
## 33   73 0.19073649 0.13075234 0.2507206
## 34   74 0.19314881 0.13141142 0.2548862
## 35   75 0.19465452 0.12812457 0.2611845
## 36   76 0.18024125 0.11972506 0.2407574
## 37   77 0.13291509 0.06694640 0.1988838
## 38   78 0.15083015 0.08372373 0.2179366
## 39   79 0.09636540 0.02962005 0.1631107
## 
## $tip_table
##                           tip   estimate   ci_lower   ci_upper immediate_parent
## 1           Callimico_goeldii 0.21739130 0.21739130 0.21739130             TRUE
## 2            Cebuella_pygmaea 0.09756098 0.09756098 0.09756098            FALSE
## 3             Mico_argentatus 0.05000000 0.05000000 0.05000000            FALSE
## 4        Callithrix_geoffroyi 0.09677419 0.09677419 0.09677419            FALSE
## 5          Callithrix_jacchus 0.02439024 0.02439024 0.02439024             TRUE
## 6      Leontopithecus_rosalia 0.12222222 0.12222222 0.12222222            FALSE
## 7  Leontopithecus_chrysomelas 0.11764706 0.11764706 0.11764706            FALSE
## 8            Saguinus_oedipus 0.17164179 0.17164179 0.17164179             TRUE
## 9            Saguinus_bicolor 0.04000000 0.04000000 0.04000000            FALSE
## 10             Saguinus_midas 0.00000000 0.00000000 0.00000000             TRUE
## 11         Saguinus_imperator 0.00000000 0.00000000 0.00000000             TRUE
## 12             Sapajus_apella 0.11538462 0.11538462 0.11538462            FALSE
## 13           Saimiri_sciureus 0.07758621 0.07758621 0.07758621            FALSE
## 14            Alouatta_caraya 0.12500000 0.12500000 0.12500000            FALSE
## 15           Ateles_geoffroyi 0.30232558 0.30232558 0.30232558             TRUE
## 16        Macaca_fascicularis 0.05405405 0.05405405 0.05405405            FALSE
## 17             Macaca_fuscata 0.09302326 0.09302326 0.09302326            FALSE
## 18             Macaca_silenus 0.08823529 0.08823529 0.08823529            FALSE
## 19               Macaca_nigra 0.02777778 0.02777778 0.02777778            FALSE
## 20       Theropithecus_gelada 0.01923077 0.01923077 0.01923077             TRUE
## 21            Papio_hamadryas 0.04854369 0.04854369 0.04854369            FALSE
## 22          Mandrillus_sphinx 0.08888889 0.08888889 0.08888889            FALSE
## 23    Cercopithecus_neglectus 0.07142857 0.07142857 0.07142857            FALSE
## 24     Trachypithecus_auratus 0.03703704 0.03703704 0.03703704            FALSE
## 25   Trachypithecus_cristatus 0.03571429 0.03571429 0.03571429            FALSE
## 26   Trachypithecus_francoisi 0.13333333 0.13333333 0.13333333            FALSE
## 27            Colobus_guereza 0.10309278 0.10309278 0.10309278            FALSE
## 28            Pan_troglodytes 0.16842105 0.16842105 0.16842105            FALSE
## 29            Gorilla_gorilla 0.20895522 0.20895522 0.20895522            FALSE
## 30              Hylobates_lar 0.15909091 0.15909091 0.15909091            FALSE
## 31             Eulemur_macaco 0.30000000 0.30000000 0.30000000             TRUE
## 32                Lemur_catta 0.15573770 0.15573770 0.15573770            FALSE
## 33              Varecia_rubra 0.18644068 0.18644068 0.18644068            FALSE
## 34          Varecia_variegata 0.21052632 0.21052632 0.21052632            FALSE
## 35      Propithecus_coquereli 0.16000000 0.16000000 0.16000000            FALSE
## 36         Microcebus_murinus 0.24561404 0.24561404 0.24561404             TRUE
## 37        Nycticebus_pygmaeus 0.26530612 0.26530612 0.26530612             TRUE
## 38         Nycticebus_coucang 0.08333333 0.08333333 0.08333333             TRUE
## 39        Galago_senegalensis 0.01515152 0.01515152 0.01515152             TRUE
## 40              Galago_moholi 0.09375000 0.09375000 0.09375000            FALSE
##    cumulative_derived direction parent_node_id parent_ci_lower parent_ci_upper
## 1                TRUE        up             47      0.06306556       0.1468459
## 2               FALSE      none             49              NA              NA
## 3               FALSE      none             49              NA              NA
## 4               FALSE      none             50              NA              NA
## 5                TRUE      down             50      0.03919697       0.1377041
## 6               FALSE      none             51              NA              NA
## 7               FALSE      none             51              NA              NA
## 8               FALSE        up             53      0.03627344       0.1301347
## 9               FALSE      none             54              NA              NA
## 10               TRUE      down             54      0.03006871       0.1277595
## 11               TRUE      down             52      0.03787129       0.1304305
## 12              FALSE      none             55              NA              NA
## 13              FALSE      none             55              NA              NA
## 14              FALSE      none             56              NA              NA
## 15               TRUE        up             56      0.08402754       0.1938831
## 16              FALSE      none             62              NA              NA
## 17              FALSE      none             62              NA              NA
## 18              FALSE      none             63              NA              NA
## 19              FALSE      none             63              NA              NA
## 20               TRUE      down             65      0.02135892       0.1199035
## 21              FALSE      none             65              NA              NA
## 22              FALSE      none             64              NA              NA
## 23              FALSE      none             59              NA              NA
## 24              FALSE      none             68              NA              NA
## 25              FALSE      none             68              NA              NA
## 26              FALSE      none             67              NA              NA
## 27              FALSE      none             66              NA              NA
## 28              FALSE      none             70              NA              NA
## 29              FALSE      none             70              NA              NA
## 30              FALSE      none             69              NA              NA
## 31               TRUE        up             74      0.13141142       0.2548862
## 32              FALSE      none             74              NA              NA
## 33              FALSE      none             75              NA              NA
## 34              FALSE      none             75              NA              NA
## 35              FALSE      none             76              NA              NA
## 36               TRUE        up             76      0.11972506       0.2407574
## 37               TRUE        up             78      0.08372373       0.2179366
## 38              FALSE      down             78      0.08372373       0.2179366
## 39               TRUE      down             79      0.02962005       0.1631107
## 40              FALSE      none             79              NA              NA
## 
## $sigma2_used
## [1] 0.0001262749
## 
## $ace
## 
##     Ancestral Character Estimation
## 
## Call: ape::ace(x = trait_vec, phy = tree_obj, type = "continuous", 
##     method = "REML")
## 
##     Residual log-likelihood: 330.1333 
## 
## $ace
##         41         42         43         44         45         46         47 
## 0.14623473 0.12690762 0.12478526 0.11833327 0.10426635 0.10487640 0.10495571 
##         48         49         50         51         52         53         54 
## 0.09250352 0.09208642 0.08845051 0.10976603 0.08415091 0.08320406 0.07891410 
##         55         56         57         58         59         60         61 
## 0.11683206 0.13895532 0.12088425 0.09215630 0.08359632 0.07707426 0.07366448 
##         62         63         64         65         66         67         68 
## 0.07365543 0.07268110 0.07485870 0.07063119 0.08935705 0.08142196 0.07769006 
##         69         70         71         72         73         74         75 
## 0.13465667 0.15159589 0.15606777 0.17955431 0.19073649 0.19314881 0.19465452 
##         76         77         78         79 
## 0.18024125 0.13291509 0.15083015 0.09636540 
## 
## $sigma2
## [1] 1.260865e-04 9.623954e-05
## 
## $CI95
##          [,1]      [,2]
## 41 0.07800692 0.2144625
## 42 0.07424590 0.1795693
## 43 0.07812529 0.1714452
## 44 0.07340375 0.1632628
## 45 0.06367056 0.1448621
## 46 0.06419512 0.1455577
## 47 0.06306556 0.1468459
## 48 0.04728177 0.1377253
## 49 0.04608405 0.1380888
## 50 0.03919697 0.1377041
## 51 0.05521258 0.1643195
## 52 0.03787129 0.1304305
## 53 0.03627344 0.1301347
## 54 0.03006871 0.1277595
## 55 0.06785792 0.1658062
## 56 0.08402754 0.1938831
## 57 0.07063623 0.1711323
## 58 0.04684096 0.1374716
## 59 0.03905823 0.1281344
## 60 0.03409627 0.1200523
## 61 0.02810202 0.1192269
## 62 0.02569223 0.1216186
## 63 0.02491605 0.1204462
## 64 0.02973485 0.1199826
## 65 0.02135892 0.1199035
## 66 0.03931862 0.1393955
## 67 0.02805279 0.1347911
## 68 0.02330546 0.1320746
## 69 0.07745506 0.1918583
## 70 0.08741394 0.2157778
## 71 0.09328381 0.2188517
## 72 0.12025054 0.2388581
## 73 0.13075234 0.2507206
## 74 0.13141142 0.2548862
## 75 0.12812457 0.2611845
## 76 0.11972506 0.2407574
## 77 0.06694640 0.1988838
## 78 0.08372373 0.2179366
## 79 0.02962005 0.1631107
## 
## 
## $rescaled_tree
## 
## Phylogenetic tree with 40 tips and 39 internal nodes.
## 
## Tip labels:
##   Callimico_goeldii, Cebuella_pygmaea, Mico_argentatus, Callithrix_geoffroyi, Callithrix_jacchus, Leontopithecus_rosalia, ...
## 
## Rooted; includes branch length(s).

6. Fan Plot for Phylogenetic Distribution

This section creates a fan-shaped phylogenetic tree.

# 5. PHYLOGENETIC DISTRIBUTION PLOT  ------------------------------------------------------
palette_values <- get_palette_values()
fill_scale <- if (is.null(palette_values)) {
  ggplot2::scale_fill_discrete()
} else {
  ggplot2::scale_fill_manual(values = palette_values)
}
color_scale <- if (is.null(palette_values)) {
  ggplot2::scale_color_discrete()
} else {
  ggplot2::scale_color_manual(values = palette_values)
}

# Create base phylogenetic tree in fan layout
base_fan_tree <- ggtree(pruned_tree,
                        layout="fan",
                        open.angle=15, 
                        size=2)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
ggsave(file.path(phylo_distribution_dir, "big_tree.png"), base_fan_tree, device = "png", width = 15, height = 15, dpi = "retina")

ordered_species <- rev(pruned_tree$tip.label)
row.names(stats_df) <- stats_df$species

# Prepare trait data for tree visualization
tree_trait_data <- stats_df %>%
  group_by(species, taxa)

# Is there a branch color trait specified?
if (isTRUE(has.branch)) {
  tree_trait_data <- tree_trait_data %>%
    mutate(
      br_trait = .data[[branch_trait]]
    )

  # Perform ancestral state reconstruction for LQ trait
  br_trait_vec <- tree_trait_data$br_trait
  names(br_trait_vec) <- tree_trait_data$species
  br_asr_fit <- phytools::fastAnc(pruned_tree, br_trait_vec, vars = TRUE, CI = TRUE)

  # Create data frames for tip and node values
  tip_br_data <- data.frame(
    node = nodeid(pruned_tree, names(br_trait_vec)),
    BR = br_trait_vec
  )
  node_br_data <- data.frame(
    node = names(br_asr_fit$ace), 
    BR = br_asr_fit$ace
  )
  combined_br_data <- rbind(tip_br_data, node_br_data)
  combined_br_data$node <- as.numeric(combined_br_data$node)
}
  
# This section is problematic. We have three optional colums: p, branch_trait, and second_trait.
if (isTRUE(has.p) && isTRUE(has.secondary)) { # Now for both has.p and has.secondary TRUE
  tree_trait_data <- tree_trait_data %>%
    dplyr::rename(secondary_value = .data[[secondary_trait]]) %>%
    summarize(
      trait_sum = sum(value, na.rm = TRUE),
      p_sum = sum(p, na.rm = TRUE),
      secondary_sum = sum(secondary_value, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    ungroup()
} else if (isTRUE(has.p)) {
  tree_trait_data <- tree_trait_data %>%
    summarize(
      trait_sum = sum(value, na.rm = TRUE),
      p_sum = sum(p, na.rm = TRUE),
      .groups = "drop") %>%
    ungroup()
} else if (isTRUE(has.secondary)) { # If has.secondary are TRUE, we would need to adjust accordingly.
  tree_trait_data <- tree_trait_data %>%
    dplyr::rename(secondary_value = .data[[secondary_trait]]) %>%
    summarize(
      trait_sum = sum(value, na.rm = TRUE),
      secondary_sum = sum(secondary_value, na.rm = TRUE),
      .groups = "drop") %>%
    ungroup()
} else { # If neither is TRUE  
  tree_trait_data <- tree_trait_data %>%
    summarize(
      trait_sum = sum(value, na.rm = TRUE),
      .groups = "drop") %>%
    ungroup()
}
## Warning: Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.
## ℹ Please use `all_of(var)` (or `any_of(var)`) instead of `.data[[var]]`
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Convert tree to tibble and join with trait data
tree_tibble <- tidytree::as_tibble(pruned_tree) %>%
  dplyr::rename(species = label)

tree_with_traits <- full_join(tree_tibble, tree_trait_data, by = 'species')

# Again, if branch color trait is present, join that data as well
if (isTRUE(has.branch)) {
  tree_with_traits <- full_join(tree_with_traits, combined_br_data, by = 'node')
}

print(tree_with_traits)
## # A tibble: 79 × 9
##    parent  node branch.length species  taxa  trait_sum p_sum secondary_sum    BR
##     <int> <dbl>         <dbl> <chr>    <fct>     <dbl> <int>         <dbl> <dbl>
##  1     47     1        10.4   Callimi… Cebi…    0.217     69        0.101  1.04 
##  2     49     2         3.48  Cebuell… Cebi…    0.0976    41        0.0488 1.16 
##  3     49     3         3.48  Mico_ar… Cebi…    0.05      20        0      0.846
##  4     50     4         0.665 Callith… Cebi…    0.0968    31        0.0323 0.903
##  5     50     5         0.665 Callith… Cebi…    0.0244    41        0.0244 1.24 
##  6     51     6         0.728 Leontop… Cebi…    0.122     90        0.0556 1.43 
##  7     51     7         0.728 Leontop… Cebi…    0.118     51        0.118  1.00 
##  8     53     8         3.25  Saguinu… Cebi…    0.172    134        0.0746 1.28 
##  9     54     9         1.53  Saguinu… Cebi…    0.04      25        0.04   0.909
## 10     54    10         1.53  Saguinu… Cebi…    0         22        0      0.984
## # ℹ 69 more rows
# Export dataframe for debug
write.csv(tree_with_traits, file.path(phylo_distribution_dir, "tree_with_traits.csv"), row.names = FALSE)

# Convert to treedata object for ggtree
tree_data_object <- tidytree::as.treedata(tree_with_traits)

# Create diagnostic plot showing node numbers and family colors
diagnostic_tree <- ggtree(tree_data_object,
                          layout = "fan",
                          open.angle = 15,
                          size = 2) +
  geom_text2(aes(label = node), hjust = -0.5, vjust = -0.5, size = 6) +
  aes(color = taxa) +
  color_scale
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
diagnostic_tree

# Save diagnostic tree
ggsave(file.path(phylo_distribution_dir, "diagnostic_tree.png"), diagnostic_tree, device = "png", width = 15, height = 15, dpi = "retina")

# If branch color trait is present, add it to the tree plot
if (isTRUE(has.branch)) {
  capitalized_branch_trait <- tools::toTitleCase(gsub("_", " ", branch_trait))
  # Create base tree with BR gradient
  tree_plot_step1 <- ggtree(tree_data_object, aes(color = BR),
                                  layout="fan",
                                  open.angle=15, 
                                  size=2) +
    scale_color_gradient(
      name = capitalized_branch_trait, 
      low = "skyblue", high = "salmon3",     
                        guide = guide_colorbar(order = 3)) +  # BR last
    new_scale_color()
} else {
  # Add trait bars with gradient
  tree_plot_step1 <- ggtree(tree_data_object,
                                  layout="fan",
                                  open.angle=15, 
                                  size=2)
}  
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
tree_plot_step1 <- tree_plot_step1 +
  # Trait bars
  geom_fruit(
    geom = geom_col,
    mapping = aes(y = species, x = trait_sum, fill = trait_sum, width = 0.5),
    alpha = 0.5,  # Set transparency for lighter appearance
    show.legend = TRUE,
    pwidth = 0.75,
    color = "black",  # Add black contour
    size = 0.7,
    axis.params = list(
      axis = 'x',
      text.size = 8,
      nbreak = 2,
      text.angle = 270,
      vjust = 0.5,
      hjust = 0,
      limits = c(0, 40)
    ),
    grid.params = list()
  ) +
  scale_fill_gradient2(
    name = capitalized_trait, 
    low = "white", 
    high = "darkseagreen4",  # Different gradient color for distinction
    guide = guide_colorbar(order = 1),
    aesthetics = "fill"
  ) +
  new_scale_fill()
## Warning in geom_col(data = structure(list(parent = c(47, 49, 49, 50, 50, :
## Ignoring unknown aesthetics: width
  # If secondary trait is present, add secondary trait bars
if (isTRUE(has.secondary)) {
  capitalized_secondary_trait <- tools::toTitleCase(gsub("_", " ", secondary_trait))

  tree_plot_step1 <- tree_plot_step1 +
    geom_fruit(
      geom = geom_col,
      offset = -0.75,
      mapping = aes(y = species, x = secondary_sum, fill = secondary_sum, width = 0.5),
      alpha = 0.5,  # Set transparency for lighter appearance
      show.legend = TRUE,
      pwidth = 0.75,
      color = "black",  # Add black contour
      size = 0.7,
      axis.params = list(
        axis = 'x',
        text.size = 0,
        nbreak = 1,
        text.angle = 270,
        vjust = 0.5,
        hjust = 0,
        limits = c(0, 20)
      ),
      grid.params = list()
    ) +
    scale_fill_gradient2(
      name = capitalized_secondary_trait, 
      low = "white", 
      high = "salmon3",  # Different gradient color for distinction
      guide = guide_colorbar(order = 2),
      aesthetics = "fill"
    ) +
    new_scale_fill()
}
## Warning in geom_col(data = structure(list(parent = c(47, 49, 49, 50, 50, :
## Ignoring unknown aesthetics: width
tree_plot_step1

# Add taxa membership indicator bars
tree_plot_step2 <- tree_plot_step1 +
  geom_fruit(geom = geom_col, 
             mapping = aes(y = species, x = 1, fill = taxa),
             pwidth = 0.1, color = "black", linewidth = 0.5, offset = 0) +
  scale_fill_manual(values = palette_values, breaks = 0) +
  new_scale_fill() +
  theme_tree() +
  theme(panel.background = element_rect(fill = "transparent", colour = NA),  
        plot.background = element_rect(fill = "transparent", colour = NA),
        legend.position = "none",
        plot.title = element_text(size = 17, margin = margin(t = 0, r = 0, b = -1.25, l = 0, unit = "cm")),
        plot.subtitle = element_text(size = 15, margin = margin(t = 0, r = 0, b = -1.5, l = 0, unit = "cm"))
  )

tree_plot_step2

# Add p labels (if applicable)
if(isTRUE(has.p)){
  tree_plot_step3 <- tree_plot_step2 +
    geom_text(aes(label = p_sum), nudge_x = 6.1, fontface = "bold", size = 6, vjust = 0.5)
} else {
  tree_plot_step3 <- tree_plot_step2
}

tree_plot_step3
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_text()`).

# Finalize tree with taxon labels and phylopic images
# Build taxon to tip mapping
tip_df <- find_taxon_mrca(pruned_tree, tree_with_traits)

# Get phylopic data
tip_df <- tip_df %>%
  rowwise() %>%  # Apply function row by row
  mutate(phylopic_UUID = rphylopic::get_uuid(name = taxa, n = 1))

# Save for debug
write.csv(tip_df, file.path(phylo_distribution_dir, "taxon_node_mapping.csv"), row.names = FALSE)

# Add family labels and phylopic images
final_tree_plot <- tree_plot_step3 +
  geom_cladelab(data = tip_df,
                mapping = aes(
                  node = mrca_node,
                  label = taxa,
                  image = phylopic_UUID,
                  color = taxa),
                geom="phylopic",
                barsize = NA,
                offset = 12,
                imagesize = 0.05,
                alpha = 0.75) +
  scale_color_manual(values = palette_values, breaks = 0) +
  geom_cladelab(data = tip_df,
                mapping = aes(
                  node = mrca_node,
                  label = taxa),
                show.legend = FALSE,
                          color = "black",
                angle = "auto",
                horizontal = TRUE,
                offset = 10,
                barsize = NA,
                fontsize = 9.5,
                fontface = "bold") +
  theme_tree() +
  theme(panel.background = element_rect(fill = "transparent", colour = NA),  
        plot.background = element_rect(fill = "transparent", colour = NA),
        legend.position = "right",
        legend.spacing.y = unit(1.5, 'cm'),
        legend.title = element_text(
          size = 15, face = "bold", 
          margin = margin(b = 15)),
        legend.key.size = unit(1, 'cm'),
        legend.text = element_text(size = 13),
        plot.title = element_text(size = 17, margin = margin(t = 0, r = 0, b = -1.25, l = 0, unit = "cm")),
        plot.subtitle = element_text(size = 15, margin = margin(t = 0, r = 0, b = -1.5, l = 0, unit = "cm"))
  )
## ! The "taxa" has(have) been found in tree data. You might need to rename the
## variable(s) in the data of "geom_cladelab" to avoid this warning!
## ! The "taxa" has(have) been found in tree data. You might need to rename the
## variable(s) in the data of "geom_cladelab" to avoid this warning!
final_tree_plot
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_text()`).
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 10 rows containing missing values or values outside the scale range
## (`geom_segment()`).

# Save progressive tree plots
ggsave(file.path(phylo_distribution_dir, "tree_with_trait_bars.png"), tree_plot_step1, device = "png", width = 15, height = 15, dpi = "retina")
ggsave(file.path(phylo_distribution_dir, "tree_with_taxa_bars.png"), tree_plot_step2, device = "png", width = 15, height = 15, dpi = "retina")
if(isTRUE(has.p)){
  ggsave(file.path(phylo_distribution_dir, "tree_with_p_labels.png"), tree_plot_step3, device = "png", width = 15, height = 15, dpi = "retina")
}
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_text()`).
ggsave(file.path(phylo_distribution_dir, "final_tree_plot.png"), final_tree_plot, device = "png", width = 17, height = 15, dpi = "retina")
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_text()`).
## Removed 10 rows containing missing values or values outside the scale range
## (`geom_segment()`).
## Removed 10 rows containing missing values or values outside the scale range
## (`geom_segment()`).

Session Information

if (length(phylo_debug_log) > 0) {
  cat("### Debug log\n")
  cat(paste0("[DEBUG] ", phylo_debug_log, "\n"))
}

Debug log

[DEBUG] args = cancer_traits_processed-LQ.csv | science.abn7829_data_s4.nex.tree | data_exploration | 1998 | primates | family | neoplasia_prevalence | adult_necropsy_count | neoplasia_necropsy | /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv | malignant_prevalence | LQ [DEBUG] seed_val = 1998 [DEBUG] workingDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/97/c764c03c2ef0b32c52ef00eece748a [DEBUG] objDir = /home/miguel/IBE-UPF/PhD/PhyloPhere/work/97/c764c03c2ef0b32c52ef00eece748a/obj [DEBUG] resultsDir = data_exploration [DEBUG] Working Directory: /home/miguel/IBE-UPF/PhD/PhyloPhere/work/97/c764c03c2ef0b32c52ef00eece748a [DEBUG] Results Directory: data_exploration [DEBUG] data_exploration_dir = data_exploration/1.Data-exploration [DEBUG] species_distribution_dir = data_exploration/1.Data-exploration/1.Species_distribution [DEBUG] extreme_plots_dir = data_exploration/1.Data-exploration/2.Extreme_plots [DEBUG] asr_trees = data_exploration/1.Data-exploration/3.ASR_trees [DEBUG] phylo_distribution_dir = data_exploration/1.Data-exploration/4.Phylogenetic_distribution [DEBUG] ci_dir = data_exploration/1.Data-exploration/5.CI_overlaps [DEBUG] palettes loaded: primates=15, primates_dark=15, mammals=17 [DEBUG] trait_path = cancer_traits_processed-LQ.csv [DEBUG] trait_df rows = 47, cols = 19 [DEBUG] trait_df columns: tax_id, species, common_name, family, adult_necropsy_count, neoplasia_necropsy, neoplasia_prevalence, benign_count, benign_prevalence, malignant_count, malignant_prevalence, body_mass, mass_g, log_body_mass, mature_f, mature_m, mature_age, MLS.anage, LQ [DEBUG] trait_df species unique = 47 [DEBUG] tree_path = science.abn7829_data_s4.nex.tree [DEBUG] tree tips = 236, nodes = 235 [DEBUG] clade_name = primates [DEBUG] taxon_of_interest = family [DEBUG] trait = neoplasia_prevalence [DEBUG] n_trait = neoplasia_necropsy [DEBUG] p_trait = adult_necropsy_count [DEBUG] has.p = TRUE, p missing = 0 [DEBUG] has.n = TRUE, n missing = 0 [DEBUG] tax_id_file = /home/miguel/IBE-UPF/PhD/PhyloPhere/Data/5.Phylogeny/taxid_species_family.tsv [DEBUG] tax_id_df rows = 1290, distinct taxa = 807 [DEBUG] has.TAX_ID = TRUE [DEBUG] trait_df merged with tax_id_df: rows = 47, missing tax_id = 0 [DEBUG] normalized tax_id from merged columns, missing tax_id = 0 [DEBUG] tree_ids rows = 40, missing tax_id = 0 [DEBUG] common_tax_ids = 40 [DEBUG] pruned_tree tips (TAX_ID) = 40, nodes = 39 [DEBUG] trait_df after TAX_ID tree filter rows = 40 [DEBUG] secondary_trait = malignant_prevalence [DEBUG] has.secondary = TRUE, missing = 0 [DEBUG] branch_trait = LQ [DEBUG] has.branch = TRUE, missing = 0 [DEBUG] createDir: created data_exploration/1.Data-exploration/2.Extreme_plots [DEBUG] createDir: created data_exploration/1.Data-exploration/3.ASR_trees [DEBUG] createDir: created data_exploration/1.Data-exploration/4.Phylogenetic_distribution [DEBUG] contrast_plot.f using p_trait = adult_necropsy_count, missing p = 0 [DEBUG] contrast_plot.f ordered_species = 40 [DEBUG] contrast_plot.f rows = 40, trait = neoplasia_prevalence, taxon = family [DEBUG] contrast_plot.f columns: species, family, neoplasia_prevalence, neoplasia_necropsy, adult_necropsy_count, malignant_prevalence, LQ, taxa_mean, taxa_median, taxa_sd, taxa_q25, taxa_q75, value, g_mean, g_median, g_sd, g_q25, g_q75, outlier, extreme_outlier, global_label, taxa_outlier, extreme_taxa_outlier, taxa_label, taxa, common_name, p [DEBUG] violin_extremes.f rows = 40, trait = neoplasia_prevalence, taxon = family [DEBUG] violin_extremes.f ordered_species = 40 [DEBUG] asr_tree.f input rows = 40, trait_vec = 40 [DEBUG] asr_tree.f tree tips after prune = 40, nodes = 39 [DEBUG] asr_tree.f trait_vec after prune = 40 [DEBUG] asr_tree.f df after prune = 40 [DEBUG] asr_tree.f fit_bm aicc = -72.498, fit_lambda aicc = -88.723 [DEBUG] asr_tree.f chosen_model = lambda [DEBUG] asr_tree.f lambda_hat = 0.5700 [DEBUG] asr_tree.f sigma2_used = 0.000126 [DEBUG] asr_tree.f node_est = 39, tip_est = 40 [DEBUG] asr_tree.f contMap range = [0.0000, 0.3023] [DEBUG] asr_tree.f input rows = 40, trait_vec = 40 [DEBUG] asr_tree.f tree tips after prune = 40, nodes = 39 [DEBUG] asr_tree.f trait_vec after prune = 40 [DEBUG] asr_tree.f df after prune = 40 [DEBUG] asr_tree.f fit_bm aicc = -72.498, fit_lambda aicc = -88.723 [DEBUG] asr_tree.f chosen_model = lambda [DEBUG] asr_tree.f lambda_hat = 0.5700 [DEBUG] asr_tree.f sigma2_used = 0.000126 [DEBUG] asr_tree.f node_est = 39, tip_est = 40 [DEBUG] asr_tree.f contMap range = [0.0000, 0.3023]

sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-conda-linux-gnu
## Running under: TUXEDO OS
## 
## Matrix products: default
## BLAS/LAPACK: /home/miguel/micromamba/envs/caas_global_cancer/lib/libopenblasp-r0.3.28.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=es_ES.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Madrid
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rphylopic_1.5.0    geiger_2.0.11      phytools_2.4-4     maps_3.4.2.1      
##  [5] ggtreeExtra_1.16.0 phylolm_2.6.5      tidytree_0.4.6     treeio_1.30.0     
##  [9] colorspace_2.1-1   ggstar_1.0.4       ggnewscale_0.5.1   ggtree_3.14.0     
## [13] ggrepel_0.9.6      reshape2_1.4.4     ggplot2_3.5.1      ape_5.8-1         
## [17] tidyr_1.3.1        dplyr_1.1.4        readr_2.1.5       
## 
## loaded via a namespace (and not attached):
##  [1] mnormt_2.1.1            pbapply_1.7-2           gridExtra_2.3          
##  [4] phangorn_2.12.1         rlang_1.1.5             magrittr_2.0.3         
##  [7] compiler_4.4.1          png_0.1-8               systemfonts_1.2.1      
## [10] vctrs_0.6.5             combinat_0.0-8          quadprog_1.5-8         
## [13] stringr_1.5.1           crayon_1.5.3            pkgconfig_2.0.3        
## [16] fastmap_1.2.0           magick_2.8.6            labeling_0.4.3         
## [19] utf8_1.2.4              subplex_1.9             deSolve_1.40           
## [22] rmarkdown_2.29          tzdb_0.5.0              ragg_1.3.3             
## [25] purrr_1.0.4             xfun_0.51               cachem_1.1.0           
## [28] aplot_0.2.5             clusterGeneration_1.3.8 jsonlite_1.9.1         
## [31] jpeg_0.1-11             parallel_4.4.1          R6_2.6.1               
## [34] bslib_0.9.0             stringi_1.8.4           parallelly_1.43.0      
## [37] jquerylib_0.1.4         numDeriv_2016.8-1.1     Rcpp_1.0.14            
## [40] iterators_1.0.14        knitr_1.50              future.apply_1.11.3    
## [43] optimParallel_1.0-2     base64enc_0.1-3         Matrix_1.7-3           
## [46] igraph_2.1.4            tidyselect_1.2.1        yaml_2.3.10            
## [49] doParallel_1.0.17       codetools_0.2-20        curl_6.2.2             
## [52] listenv_0.9.1           lattice_0.22-6          tibble_3.2.1           
## [55] plyr_1.8.9              withr_3.0.2             ggimage_0.3.3          
## [58] coda_0.19-4.1           evaluate_1.0.3          gridGraphics_0.5-1     
## [61] future_1.34.0           pillar_1.10.1           foreach_1.5.2          
## [64] ggfun_0.1.8             generics_0.1.3          grImport2_0.3-3        
## [67] hms_1.1.3               munsell_0.5.1           scales_1.3.0           
## [70] globals_0.16.3          glue_1.8.0              scatterplot3d_0.3-44   
## [73] lazyeval_0.2.2          tools_4.4.1             fs_1.6.5               
## [76] mvtnorm_1.3-3           XML_3.99-0.18           fastmatch_1.1-6        
## [79] grid_4.4.1              nlme_3.1-167            patchwork_1.3.0        
## [82] cli_3.6.4               DEoptim_2.2-8           textshaping_1.0.0      
## [85] rsvg_2.6.1              expm_1.0-0              gtable_0.3.6           
## [88] yulab.utils_0.2.0       sass_0.4.9              digest_0.6.37          
## [91] ggplotify_0.1.2         farver_2.1.2            htmltools_0.5.8.1      
## [94] lifecycle_1.0.4         httr_1.4.7              MASS_7.3-65